home *** CD-ROM | disk | FTP | other *** search
- *
- * $VER: cos.s 33.1 (22.1.97)
- *
- * calculates the sine or cosine of the source operand
- *
- * Version history:
- *
- * 33.1 22.1.97 (c) Motorola
- *
- * - snipped from M68060SP
- *
- * 33.2 24.1.97 laukkanen
- *
- * - cos(0) was broken, found out this when trying
- * to link mp3decoder with the new library, fixed.
- *
-
- machine 68040
- fpu 1
-
- XDEF _sin
- XDEF @sin
- XDEF _cos
- XDEF @cos
- XDEF PITBL
-
- *************************************************************************
- * sin(): computes the sine of a normalized input *
- * cos(): computes the cosine of a normalized input *
- * *
- * INPUT *************************************************************** *
- * fp0 = extended precision input *
- * *
- * OUTPUT ************************************************************** *
- * fp0 = sin(X) or cos(X) *
- * *
- * ACCURACY and MONOTONICITY ******************************************* *
- * The returned result is within 1 ulp in 64 significant bit, i.e. *
- * within 0.5001 ulp to 53 bits if the result is subsequently *
- * rounded to double precision. The result is provably monotonic *
- * in double precision. *
- * *
- * ALGORITHM *********************************************************** *
- * *
- * SIN and COS: *
- * 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. *
- * *
- * 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. *
- * *
- * 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let *
- * k = N mod 4, so in particular, k = 0,1,2,or 3. *
- * Overwrite k by k := k + AdjN. *
- * *
- * 4. If k is even, go to 6. *
- * *
- * 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. *
- * Return sgn*cos(r) where cos(r) is approximated by an *
- * even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), *
- * s = r*r. *
- * Exit. *
- * *
- * 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) *
- * where sin(r) is approximated by an odd polynomial in r *
- * r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. *
- * Exit. *
- * *
- * 7. If |X| > 1, go to 9. *
- * *
- * 8. (|X|<2**(-40)) If SIN is invoked, return X; *
- * otherwise return 1. *
- * *
- * 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, *
- * go back to 3. *
- * *
- *************************************************************************
-
- SINA7 dc.l $BD6AAA77,$CCC994F5
- SINA6 dc.l $3DE61209,$7AAE8DA1
- SINA5 dc.l $BE5AE645,$2A118AE4
- SINA4 dc.l $3EC71DE3,$A5341531
- SINA3 dc.l $BF2A01A0,$1A018B59,$00000000,$00000000
- SINA2 dc.l $3FF80000,$88888888,$888859AF,$00000000
- SINA1 dc.l $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000
- TWOBYPI dc.l $3FE45F30,$6DC9C883
- COSB8 dc.l $3D2AC4D0,$D6011EE3
- COSB7 dc.l $BDA9396F,$9F45AC19
- COSB6 dc.l $3E21EED9,$0612C972
- COSB5 dc.l $BE927E4F,$B79D9FCF
- COSB4 dc.l $3EFA01A0,$1A01D423,$00000000,$00000000
- COSB3 dc.l $BFF50000,$B60B60B6,$0B61D438,$00000000
- COSB2 dc.l $3FFA0000,$AAAAAAAA,$AAAAAB5E
- COSB1 dc.l $BF000000
-
- PITBL dc.l $C0040000,$C90FDAA2,$2168C235,$21800000
- dc.l $C0040000,$C2C75BCD,$105D7C23,$A0D00000
- dc.l $C0040000,$BC7EDCF7,$FF523611,$A1E80000
- dc.l $C0040000,$B6365E22,$EE46F000,$21480000
- dc.l $C0040000,$AFEDDF4D,$DD3BA9EE,$A1200000
- dc.l $C0040000,$A9A56078,$CC3063DD,$21FC0000
- dc.l $C0040000,$A35CE1A3,$BB251DCB,$21100000
- dc.l $C0040000,$9D1462CE,$AA19D7B9,$A1580000
- dc.l $C0040000,$96CBE3F9,$990E91A8,$21E00000
- dc.l $C0040000,$90836524,$88034B96,$20B00000
- dc.l $C0040000,$8A3AE64F,$76F80584,$A1880000
- dc.l $C0040000,$83F2677A,$65ECBF73,$21C40000
- dc.l $C0030000,$FB53D14A,$A9C2F2C2,$20000000
- dc.l $C0030000,$EEC2D3A0,$87AC669F,$21380000
- dc.l $C0030000,$E231D5F6,$6595DA7B,$A1300000
- dc.l $C0030000,$D5A0D84C,$437F4E58,$9FC00000
- dc.l $C0030000,$C90FDAA2,$2168C235,$21000000
- dc.l $C0030000,$BC7EDCF7,$FF523611,$A1680000
- dc.l $C0030000,$AFEDDF4D,$DD3BA9EE,$A0A00000
- dc.l $C0030000,$A35CE1A3,$BB251DCB,$20900000
- dc.l $C0030000,$96CBE3F9,$990E91A8,$21600000
- dc.l $C0030000,$8A3AE64F,$76F80584,$A1080000
- dc.l $C0020000,$FB53D14A,$A9C2F2C2,$1F800000
- dc.l $C0020000,$E231D5F6,$6595DA7B,$A0B00000
- dc.l $C0020000,$C90FDAA2,$2168C235,$20800000
- dc.l $C0020000,$AFEDDF4D,$DD3BA9EE,$A0200000
- dc.l $C0020000,$96CBE3F9,$990E91A8,$20E00000
- dc.l $C0010000,$FB53D14A,$A9C2F2C2,$1F000000
- dc.l $C0010000,$C90FDAA2,$2168C235,$20000000
- dc.l $C0010000,$96CBE3F9,$990E91A8,$20600000
- dc.l $C0000000,$C90FDAA2,$2168C235,$1F800000
- dc.l $BFFF0000,$C90FDAA2,$2168C235,$1F000000
- dc.l $00000000,$00000000,$00000000,$00000000
- dc.l $3FFF0000,$C90FDAA2,$2168C235,$9F000000
- dc.l $40000000,$C90FDAA2,$2168C235,$9F800000
- dc.l $40010000,$96CBE3F9,$990E91A8,$A0600000
- dc.l $40010000,$C90FDAA2,$2168C235,$A0000000
- dc.l $40010000,$FB53D14A,$A9C2F2C2,$9F000000
- dc.l $40020000,$96CBE3F9,$990E91A8,$A0E00000
- dc.l $40020000,$AFEDDF4D,$DD3BA9EE,$20200000
- dc.l $40020000,$C90FDAA2,$2168C235,$A0800000
- dc.l $40020000,$E231D5F6,$6595DA7B,$20B00000
- dc.l $40020000,$FB53D14A,$A9C2F2C2,$9F800000
- dc.l $40030000,$8A3AE64F,$76F80584,$21080000
- dc.l $40030000,$96CBE3F9,$990E91A8,$A1600000
- dc.l $40030000,$A35CE1A3,$BB251DCB,$A0900000
- dc.l $40030000,$AFEDDF4D,$DD3BA9EE,$20A00000
- dc.l $40030000,$BC7EDCF7,$FF523611,$21680000
- dc.l $40030000,$C90FDAA2,$2168C235,$A1000000
- dc.l $40030000,$D5A0D84C,$437F4E58,$1FC00000
- dc.l $40030000,$E231D5F6,$6595DA7B,$21300000
- dc.l $40030000,$EEC2D3A0,$87AC669F,$A1380000
- dc.l $40030000,$FB53D14A,$A9C2F2C2,$A0000000
- dc.l $40040000,$83F2677A,$65ECBF73,$A1C40000
- dc.l $40040000,$8A3AE64F,$76F80584,$21880000
- dc.l $40040000,$90836524,$88034B96,$A0B00000
- dc.l $40040000,$96CBE3F9,$990E91A8,$A1E00000
- dc.l $40040000,$9D1462CE,$AA19D7B9,$21580000
- dc.l $40040000,$A35CE1A3,$BB251DCB,$A1100000
- dc.l $40040000,$A9A56078,$CC3063DD,$A1FC0000
- dc.l $40040000,$AFEDDF4D,$DD3BA9EE,$21200000
- dc.l $40040000,$B6365E22,$EE46F000,$A1480000
- dc.l $40040000,$BC7EDCF7,$FF523611,$21E80000
- dc.l $40040000,$C2C75BCD,$105D7C23,$20D00000
- dc.l $40040000,$C90FDAA2,$2168C235,$A1800000
-
- X EQU -12
- XFRAC EQU X+4
- POSNEG1 EQU -16
- ADJN EQU -20
- INT EQU -16
- TEMP_SIZE EQU 20
-
- ********************************************
- _sin
- fmove.d (4,sp),fp0
- @sin
- link a0,#-TEMP_SIZE
- clr.l (ADJN,a0) ; yes; SET ADJN TO 0
- bra.b SINBGN
-
- ********************************************
-
- _cos
- fmove.d (4,sp),fp0
- @cos
- link a0,#-TEMP_SIZE
- moveq #1,d0
- move.l d0,(ADJN,a0) ; yes; SET ADJN TO 1
-
- ********************************************
-
- SINBGN
-
- *--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
-
- fmove.x fp0,(X,a0) ; save input at X
-
- * "COMPACTIFY" X
- move.l (X,a0),d1 ; put exp in hi word
- move.w (X+4,a0),d1 ; fetch hi(man)
- and.l #$7FFFFFFF,d1 ; strip sign
-
- cmpi.l #$3FD78000,d1 ; is |X| >= 2**(-40)?
- bge.b .SOK1 ; no
- bra.w .SINSM ; yes; input is very small
-
- .SOK1
- cmp.l #$4004BC7E,d1 ; is |X| < 15 PI?
- blt.b .SINMAIN ; no
- bra.w .SREDUCEX ; yes; input is very large
-
- *--THIS IS THE USUAL CASE, |X| <= 15 PI.
- *--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
- .SINMAIN
- fmove.x fp0,fp1
- fmul.d (TWOBYPI,pc),fp1 ; X*2/PI
- lea (PITBL+$200,pc),a1 ; TABLE OF N*PI/2, N = -32,...,32
-
- fmove.l fp1,(INT,a0) ; CONVERT TO INTEGER
-
- move.l (INT,a0),d1 ; make a copy of N
- asl.l #4,d1 ; N *= 16
- add.l d1,a1 ; tbl_addr = a1 + (N*16)
-
- * A1 IS THE ADDRESS OF N*PIBY2
- * ...WHICH IS IN TWO PIECES Y1 # Y2
- fsub.x (a1)+,fp0 ; X-Y1
- fsub.s (a1),fp0 ; fp0 = R = (X-Y1)-Y2
- asr.l #4,d1
- .SINCONT
- *--continuation from REDUCEX
-
- *--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
- add.l (ADJN,a0),d1 ; SEE IF D0 IS ODD OR EVEN
- ror.l #1,d1 ; D0 WAS ODD IFF D0 IS NEGATIVE
- tst.l d1
- blt.w .COSPOLY
-
- *--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
- *--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
- *--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
- *--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
- *--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
- *--WHERE T=S*S.
- *--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
- *--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
- .SINPOLY
- fmovem.x fp2/fp3,-(sp) ; save fp2/fp3
-
- fmove.x fp0,(X,a0) ; X IS R
- fmul.x fp0,fp0 ; FP0 IS S
-
- fmove.d (SINA7,pc),fp3
- fmove.d (SINA6,pc),fp2
-
- fmove.x fp0,fp1
- fmul.x fp1,fp1 ; FP1 IS T
-
- ror.l #1,d1
- and.l #$80000000,d1
- * ...LEAST SIG. BIT OF D0 IN SIGN POSITION
- eor.l d1,(X,a0) ; X IS NOW R'= SGN*R
- fmul.x fp1,fp3 ; TA7
- fmul.x fp1,fp2 ; TA6
-
- fadd.d (SINA5,pc),fp3 ; A5+TA7
- fadd.d (SINA4,pc),fp2 ; A4+TA6
-
- fmul.x fp1,fp3 ; T(A5+TA7)
- fmul.x fp1,fp2 ; T(A4+TA6)
-
- fadd.d (SINA3,pc),fp3 ; A3+T(A5+TA7)
- fadd.x (SINA2,pc),fp2 ; A2+T(A4+TA6)
-
- fmul.x fp3,fp1 ; T(A3+T(A5+TA7))
-
- fmul.x fp0,fp2 ; S(A2+T(A4+TA6))
- fadd.x (SINA1,pc),fp1 ; A1+T(A3+T(A5+TA7))
- fmul.x (X,a0),fp0 ; R'*S
-
- fadd.x fp2,fp1 ; [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
-
- fmul.x fp1,fp0 ; SIN(R')-R'
-
- fmovem.x (sp)+,fp2/fp3 ; restore fp2/fp3
-
- fadd.x (X,a0),fp0 ; last inst - possible exception set
- unlk a0
- rts
-
- *--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
- *--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
- *--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
- *--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
- *--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
- *--WHERE T=S*S.
- *--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
- *--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
- *--AND IS THEREFORE STORED AS SINGLE PRECISION.
- .COSPOLY
- fmovem.x fp2/fp3,-(sp) ; save fp2/fp3
-
- fmul.x fp0,fp0 ; FP0 IS S
-
- fmove.d (COSB8,pc),fp2
- fmove.d (COSB7,pc),fp3
-
- fmove.x fp0,fp1
- fmul.x fp1,fp1 ; FP1 IS T
- fmove.x fp0,(X,a0) ; X IS S
- ror.l #1,d1
- and.l #$80000000,d1
- * ...LEAST SIG. BIT OF D0 IN SIGN POSITION
-
- fmul.x fp1,fp2 ; TB8
-
- eor.l d1,(X,a0) ; X IS NOW S'= SGN*S
- and.l #$80000000,d1
-
- fmul.x fp1,fp3 ; TB7
-
- or.l #$3F800000,d1 ; D0 IS SGN IN SINGLE
- move.l d1,(POSNEG1,a0)
-
- fadd.d (COSB6,pc),fp2 ; B6+TB8
- fadd.d (COSB5,pc),fp3 ; B5+TB7
-
- fmul.x fp1,fp2 ; T(B6+TB8)
- fmul.x fp1,fp3 ; T(B5+TB7)
-
- fadd.d (COSB4,pc),fp2 ; B4+T(B6+TB8)
- fadd.x (COSB3,pc),fp3 ; B3+T(B5+TB7)
-
- fmul.x fp1,fp2 ; T(B4+T(B6+TB8))
- fmul.x fp3,fp1 ; T(B3+T(B5+TB7))
-
- fadd.x (COSB2,pc),fp2 ; B2+T(B4+T(B6+TB8))
- fadd.s (COSB1,pc),fp1 ; B1+T(B3+T(B5+TB7))
-
- fmul.x fp2,fp0 ; S(B2+T(B4+T(B6+TB8)))
-
- fadd.x fp1,fp0
-
- fmul.x (X,a0),fp0
-
- fmovem.x (sp)+,fp2/fp3 ; restore fp2/fp3
-
- fadd.s (POSNEG1,a0),fp0 ; last inst - possible exception set
- unlk a0
- rts
-
- **********************************************
-
- * SINe: Big OR Small?
- *--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
- *--IF |X| < 2**(-40), RETURN X OR 1.
- .SINSM
- move.l (ADJN,a0),d1
- tst.l d1
- bgt.b .COSTINY
-
- * here, the operation may underflow iff the precision is sgl or dbl.
- * extended denorms are handled through another entry point.
- .SINTINY
- fmove.x (X,a0),fp0 ; last inst - possible exception set
- unlk a0
- rts
-
- .COSTINY
- fmove.s #$3F800000,fp0 ; fp0 = 1.0
- unlk a0
- rts
-
-
-
- FP_SCR0_LO EQU -4
- FP_SCR0_HI EQU -8
- FP_SCR0_EX EQU -12
- FP_SCR0 EQU -12
- FP_SCR1_LO EQU -16
- FP_SCR1_HI EQU -20
- FP_SCR1_EX EQU -24
- FP_SCR1 EQU -24
- INARG EQU -12
- ENDFLAG EQU -28
- TWOTO63 EQU -32
-
- REDUCE_SIZE EQU 32
-
- ;--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
- ;--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
- ;--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
- .SREDUCEX
- fmovem.x fp2-fp5,-(sp) ; save {fp2-fp5}
- movem.l d2/a0,-(sp) ; save d2
- fmove.s #$00000000,fp1 ; fp1 = 0
- link a0,#-REDUCE_SIZE
-
- ;--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
- ;--there is a danger of unwanted overflow in first LOOP iteration. In this
- ;--case, reduce argument by one remainder step to make subsequent reduction
- ;--safe.
-
- cmp.l #$7ffeffff,d1 ; is arg dangerously large?
- bne.b .SLOOP ; no
-
- ; yes; create 2**16383*PI/2
-
- move.w #$7ffe,(FP_SCR0_EX,a0)
- move.l #$c90fdaa2,(FP_SCR0_HI,a0)
- clr.l (FP_SCR0_LO,a0)
-
- ; create low half of 2**16383*PI/2 at FP_SCR1
-
- move.w #$7fdc,(FP_SCR1_EX,a0)
- move.l #$85a308d3,(FP_SCR1_HI,a0)
- clr.l (FP_SCR1_LO,a0)
-
- fcmp.s #0,fp0 ; test sign of argument
- fblt.w .sred_neg
-
- or.b #$80,(FP_SCR0_EX,a0) ; positive arg
- or.b #$80,(FP_SCR1_EX,a0)
- .sred_neg
- fadd.x (FP_SCR0,a0),fp0 ; high part of reduction is ex
- .act
- fmove.x fp0,fp1 ; save high result in fp1
- fadd.x (FP_SCR1,a0),fp0 ; low part of reduction
- fsub.x fp0,fp1 ; determine low component of r
- .esult
- fadd.x (FP_SCR1,a0),fp1 ; fp0/fp1 are reduced argument
- .
-
- ;--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
- ;--integer quotient will be stored in N
- ;--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
- .SLOOP
- fmove.x fp0,(INARG,a0) ; +-2**K * F, 1 <= F < 2
- move.w (INARG,a0),d1
- move.l d1,a1 ; save a copy of D0
- and.l #$00007FFF,d1
- sub.l #$00003FFF,d1 ; d0 = K
- cmp.l #28,d1
- ble.b .SLASTLOOP
- .SCONTLOOP
- sub.l #27,d1 ; d0 = L := K-27
- clr.b (ENDFLAG,a0)
- bra.b .SWORK
- .SLASTLOOP
- clr.l d1 ; d0 = L := 0
- move.b #1,(ENDFLAG,a0)
-
- .SWORK
- ;--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
- ;--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
-
- ;--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
- ;--2**L * (PIby2_1), 2**L * (PIby2_2)
-
- move.l #$00003FFE,d2 ; BIASED EXP OF 2/PI
- sub.l d1,d2 ; BIASED EXP OF 2**(-L)*(2/PI)
-
- move.l #$A2F9836E,(FP_SCR0_HI,a0)
- move.l #$4E44152A,(FP_SCR0_LO,a0)
- move.w d2,(FP_SCR0_EX,a0) ; FP_SCR0 = 2**(-L)*(2/PI)
-
- fmove.x fp0,fp2
- fmul.x (FP_SCR0,a0),fp2 ; fp2 = X * 2**(-L)*(2/PI)
-
- ;--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
- ;--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
- ;--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
- ;--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
- ;--US THE DESIRED VALUE IN FLOATING POINT.
- move.l a1,d2
- swap d2
- and.l #$80000000,d2
- or.l #$5F000000,d2 ; d2 = SIGN(INARG)*2**63 IN SGL
- move.l d2,(TWOTO63,a0)
- fadd.s (TWOTO63,a0),fp2 ; THE FRACTIONAL PART OF FP1 IS ROUNDED
- fsub.s (TWOTO63,a0),fp2 ; fp2 = N
-
- ;--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
- move.l d1,d2 ; d2 = L
-
- add.l #$00003FFF,d2 ; BIASED EXP OF 2**L * (PI/2)
- move.w d2,(FP_SCR0_EX,a0)
- move.l #$C90FDAA2,(FP_SCR0_HI,a0)
- clr.l (FP_SCR0_LO,a0) ; FP_SCR0 = 2**(L) * Piby2_1
-
- add.l #$00003FDD,d1
- move.w d1,(FP_SCR1_EX,a0)
- move.l #$85A308D3,(FP_SCR1_HI,a0)
- clr.l (FP_SCR1_LO,a0) ; FP_SCR1 = 2**(L) * Piby2_2
-
- move.b (ENDFLAG,a0),d1
-
- ;--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
- ;--P2 = 2**(L) * Piby2_2
- fmove.x fp2,fp4 ; fp4 = N
- fmul.x (FP_SCR0,a0),fp4 ; fp4 = W = N*P1
- fmove.x fp2,fp5 ; fp5 = N
- fmul.x (FP_SCR1,a0),fp5 ; fp5 = w = N*P2
- fmove.x fp4,fp3 ; fp3 = W = N*P1
-
- ;--we want P+p = W+w but |p| <= half ulp of P
- ;--Then, we need to compute A := R-P and a := r-p
- fadd.x fp5,fp3 ; fp3 = P
- fsub.x fp3,fp4 ; fp4 = W-P
-
- fsub.x fp3,fp0 ; fp0 = A := R - P
- fadd.x fp5,fp4 ; fp4 = p = (W-P)+w
-
- fmove.x fp0,fp3 ; fp3 = A
- fsub.x fp4,fp1 ; fp1 = a := r - p
-
- ;--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
- ;--|r| <= half ulp of R.
- fadd.x fp1,fp0 ; fp0 = R := A+a
- ;--No need to calculate r if this is the last loop
- tst.b d1
- bgt.w .SRESTORE
-
- ;--Need to calculate r
- fsub.x fp0,fp3 ; fp3 = A-R
- fadd.x fp3,fp1 ; fp1 = r := (A-R)+a
- bra.w .SLOOP
-
- .SRESTORE
- fmove.l fp2,d1
- unlk a0
- movem.l (sp)+,d2/a0 ; restore d2
- fmovem.x (sp)+,fp2-fp5 ; restore {fp2-fp5}
-
- bra.w .SINCONT
-